###########################################################################
###########################################################################
### ###
### Project: 
### On the Demand for Telemedicine: Evidence from the COVID-19 Pandemic ###
### ###
###########################################################################
###########################################################################
#Install packages and open libraries
install.packages("easypackages")
library(easypackages)
#install_packages('plyr', "dplyr", "readr", "tidyverse","mice", "rvest", "plotly", "haven", "scales", "lubridate",
#                 "plm", "lmtest", "stargazer", "data.table", "broom", "texreg","GGally", "readxl", "estimatr", "stringr", "car")
libraries('plyr', "dplyr", "readr", "tidyverse","mice", "rvest", "plotly", "haven", "scales", "lubridate",
                 "plm", "lmtest", "stargazer", "data.table", "broom", "texreg","GGally", "readxl", "estimatr", "stringr", "car")
#When askedDo you want to install from sources the package which needs compilation? (Yes/no/cancel): type no
install.packages("remotes")
library(remotes)
remotes::install_github("ChandlerLutz/starpolishr")
###################################################################
### Upload data
###################################################################
setwd(path.expand("~/Dropbox/Llamando al Doctor/text/Health Economics/Harvard Dataverse/Data"))
DF <- read_csv("Micro_Data.csv")
applemobilitytrends <- read_csv("applemobilitytrends-2021-07-31.csv") #Apple Data
moovit <- read_csv("Moovit_Data_6_21.csv") #Moovit data 
provider_data <- read_csv("provider.csv") #Large provider data
setwd(path.expand("~/Dropbox/Llamando al Doctor/text/Health Economics/Harvard Dataverse/Output"))
###################################################################
### User defined functions 
###################################################################
percent <- function(x, digits = 1, format = "f", ...) {    #To format results to percentages
  paste0(formatC(x * 100, format = format, digits = digits, ...), "%")}

logplus <- function(x) { #Log transformation for variables that take value 0
  y <- log(x + 1)
  return(y)}

na.zero <- function (x) { #Function that changes NA's to 0 values in a dataframe
  x[is.na(x)] <- 0
  return(x)
}
###################################################################################
#######      Table 1: Telemedicine Service 2019 Descriptive Statistics       ######
###################################################################################
##################################################################
#Step1) Filter data to 2019 and generate interest variables
###################################################################
DF_2019 <- DF %>% 
  filter(año_llamada == 2019) #We have 10340 obs. for 2019 
DF_2019 <- DF_2019 %>%
  mutate(Resolved = ifelse(status == "Resolved",1,0),
         Prescription = ifelse(RECETA=="Si",1,0),
         `Follow-up` = ifelse(status == "Follow-up",1,0),
         Referral = ifelse(status=="Referral",1,0),
         `General Medicine` = ifelse(Specialty=="General Medicine",1,0),
         `Ob/Gyn` = ifelse(Specialty == "Ob/Gyn",1,0),
         Pediatrics = ifelse(Specialty == "Pediatrics",1,0),
         Male = ifelse(GENERO == "M",1,0),
         `Previously Diagnosed` = pre_existing,
         Age = EDAD)
###################################################################
#Step2) Calculating summary statistics 
###################################################################
tab <- lapply(DF_2019[14:23], function(dv){ #Loop that summarizes var statistics
  line <- DF_2019 %>% 
    summarize(Average=mean(dv), `Std Dev.` = sd(dv), `Obs.` = sum(dv))
})
tab <- data.frame(do.call(rbind, tab)) #Editing table for export and labeling columns
tab <- setDT(tab, keep.rownames = TRUE)[]
tab <- tab %>% 
  mutate(Average = percent(Average), `Std.Dev.` = percent(`Std.Dev.`))
tab <- tab %>% #Since Age is not a percentage, calculate stats separately
  filter(!rn == "Age")
line <- DF_2019 %>% 
  summarize(Average= round(mean(Age),1),`Std.Dev.` = round(sd(Age),1), `Obs.` = n())
rn <- "Age"
line <- cbind(rn,line)
tab <- rbind(tab,line)
###################################################################
#Step2) Organize and print results 
###################################################################
tab <- tab %>% #Change the order of the variables to group them according to their categories
  rename(Var = rn) %>% 
  mutate(Var= factor(Var, levels = c("Resolved","Prescription","Follow-up","Referral",
                                     'General Medicine',"Ob/Gyn","Pediatrics", "Age",
                                     "Male", "Previously Diagnosed")))
tab <- tab[order(tab$Var),] #Order variables
print(xtable(tab),type="latex", floating = FALSE, include.rownames = FALSE, file="Tab1_descriptives.tex") #Output1
rm(DF_2019,line,tab,rn) #Clean environment 

###################################################################################
#######      #Figure 1: Mobility Indicators in Argentina for 2020       ######
###################################################################################
##################################################################
#Step1) Clean data from Apple and Moovit 
###################################################################
Apple_raw <-  applemobilitytrends %>%  #Cleaning Apple data
  filter(region == "Argentina" | country == "Argentina") %>% 
  tidyr::gather(Date, Transit, "2020-01-13":"2021-07-31", factor_key = TRUE) %>% 
  filter(geo_type == "country/region") %>% 
  select("Date", "Transit", "transportation_type") %>% 
  mutate(Date = as.Date(Date),
         Week = lubridate::week(ymd(Date)),
         Week_con = ifelse(Date >= as.Date("2021-01-01"), Week+53,Week)) %>% 
  group_by(Week_con, transportation_type) %>% 
  mutate(Transit_week = mean(Transit, na.rm = TRUE)) %>% 
  ungroup()
Apple_raw <- Apple_raw %>% 
  dplyr::rename(measure = transportation_type) 
rm(applemobilitytrends)
Apple_data <- Apple_raw %>% 
  dplyr::select(Date, Transit, measure) %>% 
  tidyr::spread(measure, Transit) %>% 
  dplyr::rename(Apple_driving = driving, Apple_walking = walking, fecha_llamada = Date) %>% 
  mutate(fecha_llamada= as.Date(fecha_llamada))
moovit_Arg <- moovit %>%  #Cleaning Moovit data
  filter(Country== "Argentina") %>% 
  mutate(Change = substr(Change, 1, nchar(Change)-1), 
         Change = as.numeric(Change)) 
moovit_Arg <- moovit_Arg %>% 
  mutate(Date = as.Date(Date, format = "%m/%d/%Y"))
moovit_raw <- moovit_Arg %>% 
  select("Date", "Change")
rm(moovit, moovit_Arg)
moovit_raw <- moovit_raw %>% 
  mutate(Week = lubridate::week(ymd(Date)),
         Week_con = ifelse(Date >= as.Date("2021-01-01"), Week+53,Week),
         change_100 = 100 + Change,
         measure = "Public Transit") %>% 
  rename(Transit = change_100) %>% 
  group_by(Week_con) %>% 
  mutate(Transit_week = mean(Transit, na.rm = TRUE)) %>% 
  ungroup()  %>% 
  select(-c("Change"))
##################################################################
#Step2) Merging data and prepping for graphs
###################################################################
Mobility <- rbind(Apple_raw, moovit_raw) #Merging both databases 
Mobility <- Mobility %>% 
  mutate(year = format(as.Date(Mobility$Date), format = "%Y"),
         Week = ifelse(year == "2021", Week + 52, Week), 
         Week = ifelse(Week >=105, 53, Week),
         Month = format(as.Date(Date),"%m-%Y"),
         Transit_week = ifelse(Week == 2, 100, Transit_week), #Making Transit week start at 100 in base week 2 (only for Apple data)
         measure = ifelse(measure == "driving", "Driving", ifelse(measure == "walking", "Walking", measure)))
new <- data.frame(Week = 2, measure = "Public Transit", Transit_week = 100) #Add a 2nd week to (for moovit data)
##^^^ Above manipulation is done since weekly averages are displayed in the graphs but data is obtained daily
Mobility <- rbind.fill(Mobility, new)
rm(Apple_data, Apple_raw, moovit_raw, new)
##################################################################
#Step3) Plot Mobility data 
###################################################################
pdf("Fig1_Mobility.pdf") #Output2
mobility <- Mobility %>% 
  filter(Week >=2 & Week <=53) %>% 
  ggplot(aes(x= Week, y= Transit_week, group = measure)) + 
  geom_line(aes(color = measure)) +
  scale_x_continuous(expand = c(0, 0.5), breaks = c(2,5,9,14,18,22,27,31,35,40,44,48,52.5), 
                     labels=c("Jan-01", "Feb-01", "Mar-01", "Apr-01", "May-01", "Jun-01","Jul-01", 
                              "Aug-01", "Sept-01","Oct-01", "Nov-01", "Dec-01","Dec-31")) + 
  theme(text=element_text(family="Times", size=12),legend.title = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) +
  scale_y_continuous(limits = c(2, max(Mobility$Transit_week)), breaks = seq(0,140,by=20)) +
  theme(legend.position="bottom") +
  ylab("Mobility(Base Jan 2020 = 100)") +
  xlab("") +
  geom_vline(xintercept = 11, linetype="dotted") +
  geom_hline(yintercept = 100, linetype = "dashed", color = "brown3" ) +
  scale_color_manual(values=c("#E69F00", "#56B4E9", "#999999")) +
  theme(axis.text.x = element_text(angle = 90)) 
plot(mobility)
dev.off()
Mobility <- Mobility %>%  #Preping Mobility data for the Event Study graph
  mutate(fecha_llamada = as.Date(Date)) %>% 
  group_by(Week_con) %>% 
  mutate(average_mob= mean(Transit,na.rm = TRUE)) %>% 
  ungroup()
Mobility <- Mobility %>% #Since mob started second/third week (Graphing purposes)
  mutate(weekday_llamada = weekdays(fecha_llamada),
         Week = ifelse(weekday_llamada== "Monday"| weekday_llamada == "Tuesday", Week-1, Week))
Mob <- Mobility %>% 
  select(Week, average_mob)
Mob <- Mob[!duplicated(Mob$Week), ]
Mob <- Mob %>% 
  filter(Week<=53)
###################################################################################
#######                 Figure 2: Event-study Analysis       ######
###################################################################################
##################################################################
#Step1) Generate outcome variables
###################################################################
DF <- DF %>%  #Count number of calls per day
  group_by(fecha_llamada) %>% 
  mutate(calls = n()) %>% 
  ungroup()
DF_callers <- DF[order(DF$fecha_llamada),] #Keep unique values for ID_PACIENTE
DF_callers <- DF_callers[!duplicated(DF_callers$ID_PATIENT), ]
DF_callers <- DF_callers %>%  #Count first time callers each day
  group_by(fecha_llamada) %>% 
  mutate(callers = n()) %>% 
  ungroup() 
DF_calls <- DF[!duplicated(DF$fecha_llamada), ] #Keep one observation per date 
DF_callers <- DF_callers[!duplicated(DF_callers$fecha_llamada), ]
###########################################################################
#Step2) Construct a single df with the variables needed for the regression
###########################################################################
DF_calls <- DF_calls %>% #Filter variables used in regressions
  dplyr::select(fecha_llamada, año_llamada, Week, weekday_llamada, Week_con, calls)
DF_callers <- DF_callers %>% 
  dplyr::select(fecha_llamada, callers)
DF_working <- merge(DF_calls, DF_callers, by = "fecha_llamada") #Merge into single dataframe
rm(DF_calls, DF_callers) #Clean environment 
###########################################################################
#Step 3) Generate variables for regresion in working DF
###########################################################################
DF_working <- DF_working %>%  #Reg variables  
  mutate(Post = ifelse(Week >= 11, 1,0), #Defining treatment period
         Treat_year = ifelse(año_llamada >2019,1,0), #Defining treatment year (we only have data for 2019/2020)
         PostxYear2020 = Post*Treat_year) #Creating interaction term 
DF_working <- DF_working %>% #Generating dummies for week_txYear2020 by interating week with year2020 and generating a factor variable
  mutate(WeekxYear = Week* Treat_year,
         weekxyear = as.character(WeekxYear)) 
###########################################################################
#Step 4) Run Event Study regressions
###########################################################################
DF_working <- within(DF_working, weekxyear <- relevel(factor(weekxyear), ref = "5")) #Use week 5 as reference week
df2 <- DF_working %>%  #Filtering data to exclude weeks without info 
  filter(!Week %in% c(6,7,8,17))
Call_ES<- lm_robust(log(calls) ~ weekxyear  + factor(Week) + weekday_llamada, data = df2, se_type = "HC1") #Event Study for calls
Caller_ES<- lm_robust(log(callers) ~ weekxyear + factor(Week) + weekday_llamada, data = df2, se_type = "HC1")#Event Study for callers
###^^^Term weekxyear0 is a variable that takes value 1 if the year of the observation belongs to 2019, which effectively includes
### a year F.E to our analysis. SE specification HC1 was use to replicate stata results 
####################################################################
#Step 5) Storing regression coefficients and relate to mobility data 
####################################################################
#-----For Calls-------
tbl <- head(broom::tidy(Call_ES, conf.int = TRUE), 50) #Saving coefficients for week dummies in a df
tbl$Number <- as.numeric(parse_number(tbl$term)) #Formating to merge with mobility data
tbl$term <- tbl$Number #Keep only coefficients for term weekxyear for weeks 1 to 53 
tbl <- tbl %>% 
  filter(term != 0)
tbl <- tbl %>% #Prep data to merge with mobility data 
  rename(Week = Number)
merge_t_week <- merge(Mob, tbl, by= "Week", all = TRUE) #Merge with mobility data
#-----For First Time Callers -------
zbl <- head(broom::tidy(Caller_ES, conf.int = TRUE), 50) 
zbl$Number <- as.numeric(parse_number(zbl$term))
zbl$term <- zbl$Number
zbl <- zbl %>% 
  filter(term != 0)
zbl <- zbl %>% 
  filter(term != 0)
zbl <- zbl %>% 
  rename(Week = Number)
merge_z_week <- merge(Mob, zbl, by= "Week", all = TRUE)
###################################################################
#Step 6) Defining graphs paramenters and graphing
###################################################################
coeff <-22 #Define relation reatio between mobility data and event study estimates 
estimate_col = "dodgerblue3" #Color of event study estimates
average_col = "seagreen" #Color of mobility time series
breaks = c(1:53) #Define breaks from 1 to 53 corresponding to the number of weeks
pdf("Fig2_calls.pdf") #Output 3
ggplot(merge_t_week, aes(x= Week, y = estimate)) +
  scale_y_continuous(name= "Point Estimate Values",
                     sec.axis = sec_axis( trans=~.*22, name="Trends in Mobility")) +
  theme_ipsum()

testimate <- ggplot(merge_t_week, aes(x=Week, y =estimate)) +
  geom_point()+
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high), color = estimate_col) +
  geom_line(aes(y=average_mob/coeff), color = average_col)+
  scale_y_continuous(
    name="Point Estimate for log(Calls)",
    sec.axis = sec_axis(~.*coeff,name = "Mobility Indicator", labels = function(x) paste0(x, "%"))) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_x_continuous(expand = c(0.01,0),breaks =breaks) +
  geom_vline(xintercept=11, linetype="dashed", 
             color = "grey", size=0.5) +
  geom_hline(yintercept = 0, linetype = "dashed", 
             color = "red", size = 0.5) + 
  theme(axis.text.x = element_text(angle = 90))
plot(testimate)
dev.off()
pdf("Fig2_callers.pdf") #Output 4
ggplot(merge_z_week, aes(x= Week, y = estimate)) +
  scale_y_continuous(name= "Point Estimate Values",
                     sec.axis = sec_axis( trans=~.*22, name="Trends in Mobility")) +
  theme_ipsum()
zestimate <- ggplot(merge_z_week, aes(x=Week, y =estimate)) +
  geom_point()+
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high), color = estimate_col) +
  geom_line(aes(y=average_mob/coeff), color = average_col)+
  scale_y_continuous(
    name="Point Estimate for log(First Time Callers)",
    sec.axis = sec_axis(~.*coeff,name = "Mobility Indicator", labels = function(x) paste0(x, "%"))) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_x_continuous(expand = c(0.01,0),breaks =breaks) +
  geom_vline(xintercept=11, linetype="dashed", 
             color = "grey", size=0.5) +
  geom_hline(yintercept = 0, linetype = "dashed", 
             color = "red", size = 0.5) + 
  theme(axis.text.x = element_text(angle = 90))
plot(zestimate)
dev.off()
rm(merge_t_week, merge_z_week, testimate, zestimate) #Cleaning environment 
###################################################################################
#######   Table 2:  Difference-in-Differences Estimates     ######
###################################################################################
####################################################################
#Step 1) Run regressions for main results
####################################################################
DD_calls <- lm(log(calls) ~ PostxYear2020 +weekday_llamada + factor(año_llamada) + factor(Week), data = df2) #for calls
DD_first <- lm(log(callers) ~ PostxYear2020 + weekday_llamada + factor(año_llamada) + factor(Week), data = df2) #for callers
####################################################################
#Step 2) Count the number of calls by call resolutions
####################################################################
DF <- DF %>% #group by date and call resolution
  group_by(status, fecha_llamada) %>% 
  mutate(count_status = n()) %>% 
  ungroup()
####################################################################
#Step 3) Generate a variable per resolution with count 
####################################################################
DF$row_num <- seq.int(nrow(DF))  #Generate call id, (for multiple calls of same caller in a day)
DF_results <- DF %>%  #Generate variables with the count of call resolutions
  tidyr::spread(status, count_status) #key=status, value = count_status
DF_results<- DF_results %>%  #Generate var for calls with prescriptions
  mutate(Prescription =ifelse(RECETA == "Si",1,0)) %>% 
  group_by(fecha_llamada) %>% 
  mutate(Prescription = sum(Prescription, rm.na = TRUE))
#^^^ Our new dataframe has a column for each resolution taking the value of count_status by day
#When caller or date is unavailable it takes value NA
DF_results <- DF_results %>% #Keep grouping variables
  dplyr::select(ID_CLIENT, ID_PATIENT, año_llamada, fecha_llamada, Week, weekday_llamada,
                Disconnected, `Follow-up`, Resolved, Referral, Prescription, calls)
DF_results <- DF_results %>%  #Change NA's to 0
  mutate(Disconnected = ifelse(is.na(Disconnected) == TRUE, 0,Disconnected),
         `Follow-up` = ifelse(is.na(`Follow-up`)== TRUE, 0, `Follow-up`),
         Resolved = ifelse(is.na(Resolved) == TRUE, 0, Resolved),
         Referral = ifelse(is.na(Referral) == TRUE, 0, Referral))
##^^^ When we don't observe a call outcome during a day we generate an NA with the spread function
### The lack of observations means that for that day the resolution count is 0 thus we change NA' to 0. 
DF_results <- DF_results %>%  # Before collapsing per day, all observations within a day take the max value of count_status
  group_by(fecha_llamada) %>% 
  mutate(Disconnected = max(Disconnected), 
         `Follow-up` = max(`Follow-up`), 
         Resolved = max(Resolved), 
         Referral = max(Referral),
         Prescription = max(Prescription)) %>% 
  ungroup()
DF_results <- DF_results %>% #Keep on observation per day
  distinct(fecha_llamada,  .keep_all = TRUE)
DF_results <- DF_results %>% #Keep variables of resolutions and date (as key)
  dplyr::select(fecha_llamada, Disconnected, `Follow-up`, Resolved, Referral, Prescription)  
####################################################################
#Step 4) Merge with our working df 
####################################################################
DF_working<- merge(DF_working, DF_results, by = "fecha_llamada") #merge call resolutions to our working dataframe
####################################################################
#Step 5) Run regressions for results & export result table 
####################################################################
DF_working <- DF_working %>%  #Transform dependent variables into log, adding 1 to variables that take value 0
  mutate(log_Resolved = log(Resolved), log_Prescription = log(Prescription),
         `log_Follow_up` = log(`Follow-up`+1), log_Referral = log(Referral+1))
df2 <- DF_working %>%  #filter weeks with no data 
  filter(!Week %in% c(6,7,8,17))
depvar2_log = c("log_Resolved", "log_Prescription", "`log_Follow_up`", "log_Referral") #define dependent variables
for(i in 1:length(depvar2_log)){ #Loop the same regression on all our resolution variables
  model <- paste("model_log", i, sep="")
  m <- lm(as.formula(paste(depvar2_log[i], "~PostxYear2020 + weekday_llamada + factor(año_llamada) + factor(Week)")), data = df2)
  assign(model, m)
}
models <- c("DD_calls", "DD_first" , "model_log1", "model_log2", "model_log3", "model_log4")  #define models to print
for(i in 1:length(models)){ #Calculate robust standard errors 
  name <- paste("robust.se", i,sep = "")
  cov <- vcovHC(get(models[i]), type = "HC0")
  robust.se <- sqrt(diag(cov))
  assign(name, robust.se )}
DF_working_pre <- DF_working %>%  #Df before post period to calculate averages before 
  filter(Post == 0)
stargazer(DD_calls, DD_first, model_log1, model_log2, model_log3,model_log4,
          se = list(robust.se1, robust.se2,robust.se3,robust.se4,robust.se5,robust.se6),
          column.labels = c("Calls", "Callers", "Resolved", "Prescription", "Follow-up", "Referral"), 
          type= "latex", dep.var.labels.include = FALSE, model.names = TRUE, model.numbers =FALSE,
          omit = c("Week", "weekday_llamada"),omit.labels = c("Week F.E","Day of Week F.E"), float = FALSE,
          keep = c("PostxYear2020"), dep.var.caption  = "", style = "jpam", omit.stat = c("f", "rsq", "ser"),
          add.lines=list(c('Average Pre', round(mean(DF_working_pre$calls),3),
                           round(mean(DF_working_pre$callers),3),
                           round(mean(DF_working_pre$Resolved),3),
                           round(mean(DF_working_pre$Prescription),2),
                           round(mean(DF_working_pre$`Follow-up`),2),
                           round(mean(DF_working_pre$Referral),2))),out = "Tab2_DiD.tex") #Output
rm(DF_results, m, cov, DF_results, model, models, name, robust.se, #Cleaning environment
   robust.se1, robust.se2, robust.se3, robust.se4, robust.se5,robust.se6)
###################################################################################
#######  #Figure 3:  Treatment Effects:  Heterogeneity              ######
###################################################################################
####################################################################
#Step1) count calls by subgroup for calls and first time callers 
####################################################################
DF_callers <- DF[order(DF$fecha_llamada),] #Keep unique values for ID_PATIENT
DF_callers <- DF_callers[!duplicated(DF_callers$ID_PATIENT), ]
DF_callers <- DF_callers %>%  #Count the number of daily first time callers 
  group_by(fecha_llamada) %>% 
  mutate(Baseline = n()) %>% 
    ungroup()
DF_calls <- DF %>% #Change name of calls/callers for baseline
  mutate(Baseline = calls)
list_DF <- list(DF_calls, DF_callers) #Make a list so we can apply same process to both dataframes at once
list_DF <- lapply(list_DF, function(x) #Create age ranges and count by age ranges
  x <- x %>% 
    mutate(age_range = ifelse(EDAD < 18 , "<18", 
                              ifelse(EDAD >= 18 & EDAD <=24, "18-24",
                                     ifelse(EDAD >=25 & EDAD <= 39, "25-39",
                                            ifelse(EDAD >=40 & EDAD <= 54, "40-54", 
                                                   ifelse(EDAD >=55 & EDAD <= 64, "55-64",
                                                          ifelse(EDAD >= 65, "65+", NA))))))) %>% 
    group_by(fecha_llamada, age_range) %>% 
    mutate(count_age = n()) %>% 
    ungroup())
list_DF <- lapply(list_DF, function(x) #Count by date and specialty
  x <- x %>% 
    group_by(fecha_llamada, Specialty) %>% 
    mutate(count_spec = n()) %>% 
    ungroup())
list_DF <- lapply(list_DF, function(x) #Count by date and gender
  x <- x %>% 
    group_by(fecha_llamada, GENERO) %>% 
    mutate(count_gender = n()) %>% 
    ungroup()
  )
list_DF <- lapply(list_DF, function(x) #Count by date and prescription(yes/no)
  x <- x %>% 
  group_by(fecha_llamada, RECETA) %>% 
  mutate(count_pres = n()) %>% 
  ungroup())
list_DF <- lapply(list_DF, function(x) #Count by date and pre existing condition(yes/no)
  x <- x %>% 
  group_by(fecha_llamada, pre_existing) %>% 
  mutate(count_condition = n()) %>% 
  ungroup())
list_DF <- lapply(list_DF, function(x) #If there is no data for pre_existing leave count as NA 
  x <- x %>%  
  mutate(count_condition = ifelse(is.na(pre_existing) == TRUE, NA, count_condition)))

list_DF <- lapply(list_DF, function(x) #Keep unique values
  x <- x %>% 
  distinct(fecha_llamada, Baseline, Specialty, count_spec, GENERO,count_gender, 
           RECETA, count_pres,pre_existing, count_condition, age_range, count_age, .keep_all = TRUE))
####################################################################
#Step 2) Generate columns by subgroup with the value of the count 
####################################################################
list_DF <- lapply(list_DF, function(x) #For medical specialty
  x <- x %>% 
    tidyr::spread(Specialty, count_spec)) #Generate column by category
list_DF <- lapply(list_DF, na.zero) #Change NA's to 0 (same previous reasoning)
list_DF <- lapply(list_DF, function(x)
  x <- x %>% 
    group_by(fecha_llamada) %>% 
    mutate(`General Medicine` = max(`General Medicine`), 
           Pediatrics = max(Pediatrics),
           `Ob/Gyn` = max(`Ob/Gyn`)))
list_DF <- lapply(list_DF, function(x) #Keep one observation per day and category
  x <- x %>% 
  distinct(fecha_llamada, Baseline, count_gender, count_pres, count_condition, count_age, .keep_all = TRUE))
list_DF <- lapply(list_DF, function(x) 
  x <- x %>% 
    tidyr::spread(age_range, count_age)) #For age range
list_DF <- lapply(list_DF, na.zero) 
list_DF <- lapply(list_DF, function(x)
x <- x %>% 
  group_by(fecha_llamada) %>% 
  mutate(`<18` = max(`<18`),
         `18-24` = max(`18-24`),
         `25-39` = max(`25-39`),
         `40-54` = max(`40-54`),
         `55-64` = max(`55-64`),
         `65+`= max(`65+`)))
list_DF <- lapply(list_DF, function(x)
x <- x %>% 
  distinct(fecha_llamada, Baseline, count_gender, count_pres, count_condition, .keep_all = TRUE))
list_DF <- lapply(list_DF, function(x) #For gender
  x <- x %>% 
  mutate(GENERO = ifelse(GENERO == "F", "Female",
                         ifelse(GENERO == "M", "Male", GENERO)))) #For gender
list_DF <- lapply(list_DF, function(x) 
  x <- x %>% 
    tidyr::spread(GENERO, count_gender))
list_DF <- lapply(list_DF, na.zero) 
list_DF <- lapply(list_DF, function(x) 
  x <- x %>% 
    group_by(fecha_llamada) %>% 
    mutate(Female = max(Female),
           Male = max(Male)))
list_DF <- lapply(list_DF, function(x) 
x <- x %>% 
  distinct(fecha_llamada, Baseline, count_pres, count_condition, .keep_all = TRUE))
list_DF <- lapply(list_DF, function(x) #For prescription
  x<- x %>% #Change the name of the si/no to prescription/no prescription
  mutate(RECETA = ifelse(RECETA == "Si", "Prescription", "No Prescription")))
list_DF <- lapply(list_DF, function(x) 
  x <- x %>% 
    tidyr::spread(RECETA, count_pres))
list_DF <- lapply(list_DF, na.zero) 
list_DF <- lapply(list_DF, function(x) 
  x <- x %>% 
  group_by(fecha_llamada) %>% 
  mutate(`Prescription` = max(`Prescription`),
         `No Prescription` = max(`No Prescription`)))
list_DF <- lapply(list_DF, function(x) 
x <- x %>% 
  distinct(fecha_llamada, Baseline, count_condition, .keep_all = TRUE))
list_DF <- lapply(list_DF, function(x) #For diagnosis
  x <- x %>% 
    mutate(pre_existing = ifelse(pre_existing == 1, "Preexist. Condit.", 
                                 ifelse(pre_existing == 0, "No Diagnosis", NA))))
list_DF <- lapply(list_DF, function(x)
x <- x %>% 
  tidyr::spread(pre_existing, count_condition)) 
list_DF <- lapply(list_DF, na.zero) 
list_DF <- lapply(list_DF, function(x)
  x <- x %>% 
  group_by(fecha_llamada) %>% 
  mutate(`Preexist. Condit.` = max(`Preexist. Condit.`),
         `No Diagnosis` = max(`No Diagnosis`)))
list_DF <- lapply(list_DF, function(x)
x <- x %>% 
  distinct(fecha_llamada, Baseline, .keep_all = TRUE))
#Since we don't complete data set for diagnosis keep 0 as NA 
#(This is because subsequent actualization of the data didn't contain this variable)
list_DF <- lapply(list_DF, function(x)
  x <- x %>% 
  mutate(`Preexist. Condit.` = ifelse(`Preexist. Condit.` == 0, NA, `Preexist. Condit.`), 
         `No Diagnosis` = ifelse(`No Diagnosis` == 0, NA, `No Diagnosis`)))
####################################################################
#Step 3) Generate Regression variables and run regressions
####################################################################
list_DF <- lapply(list_DF, function(x) #generate regression varaibles 
  x <- x %>% 
    mutate(Post = ifelse(Week >= 11, 1,0), #define treatment period
           Year2020 = ifelse(año_llamada == "2020", 1,0), #define treatment year
           PostxYear= Post*Year2020)) #Interaction term 
depvar <- c("Baseline", "General Medicine", "Pediatrics", "Ob/Gyn", "<18", "18-24","25-39", "40-54", "55-64",
            '65+', "Female", "Male", "Preexist. Condit.") #define dependent variables
#-----For Calls-------
DF_H <- list_DF[[1]]  #Take call dataframe from list 
DF_H <- DF_H %>% #filter weeks with no observations
  filter(!Week %in% c(6,7,8,17))
regresults <- lapply(depvar, function(dv) { #Loop over dependent variables 
  Hetero_mod2 <- lm_robust(log(get(dv)+1) ~ PostxYear + factor(weekday_llamada) + factor(año_llamada) + factor(Week), data = DF_H,
                           se_type = "HC1")
  Coeff_new <- head(broom::tidy(Hetero_mod2, conf.int = TRUE), 2) #Constant and coefficient for PostxYear term 
  Coeff_new <- Coeff_new %>%  #Keep coefficient for PostxYear term only
    filter(term == "PostxYear")
})
allresults <- data.frame(depvar = depvar, #Contain all results from loop in a single dataframe
                         do.call(rbind, regresults))
allresults <- allresults %>% #rename depvar to term for graphing purposes
  subset(select = -c(term)) %>% 
  rename(term = (depvar))
allresults <- allresults %>% #Re-arrange order of terms to group by category
  mutate(term = factor(term, levels = c("Baseline", "General Medicine", "Ob/Gyn", "Pediatrics", 
                            "<18",  "18-24", "25-39", "40-54", "55-64", "65+", "Male", "Female",
                            "Preexist. Condit.")))
#-----For Callers-------
DF_Callers_H <- list_DF[[2]] #Take caller dataframe from list
DF_Callers_H <- DF_Callers_H %>% 
  filter(!Week %in% c(6,7,8,17))
regresults_f <- lapply(depvar, function(dv) {
  Hetero_mod2_f <- lm_robust(logplus(get(dv)) ~ PostxYear + factor(weekday_llamada) + factor(año_llamada) + factor(Week), data = DF_Callers_H,
                             se_type = "HC1")
  Coeff_new <- head(broom::tidy(Hetero_mod2_f, conf.int = TRUE), 2)
  Coeff_new <- Coeff_new %>% 
    filter(term == "PostxYear")
})

allresults_f <- data.frame(depvar = depvar, 
                           do.call(rbind, regresults_f))
allresults_f <- allresults_f %>% 
  subset(select = -c(term)) %>% 
  rename(term = (depvar))
allresults_f <- allresults_f %>% 
  mutate(term= as.character(term))
allresults_f <- allresults_f %>% 
  mutate(term = factor(term, levels = c("Baseline", "General Medicine", "Ob/Gyn", "Pediatrics", 
                            "<18",  "18-24", "25-39", "40-54", "55-64", "65+", "Male", "Female", "Preexist. Condit."))) 
####################################################################
#Step 4) Graphing coefficients and CI 
####################################################################
pdf("Fig3_calls.pdf")
Hetero_graph <- ggcoef(allresults, color = "dodgerblue3", size = 2, errorbar_color = "dodgerblue3", 
                       vline_color = "grey1", vline_intercept = allresults$estimate[1]) + 
  geom_hline(yintercept = c(1.5, 4.5, 10.5), linetype="dashed", color = "grey")+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 90, size = 14),
        axis.text.y = element_text(size = 14)) +
  scale_x_continuous(limits = c(0, 5)) +  xlab("") +
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  coord_flip()
plot(Hetero_graph)
dev.off()
pdf("Fig3_callers.pdf")
Hetero_graph_f <- ggcoef(allresults_f, color = "dodgerblue3", size = 2, errorbar_color = "dodgerblue3", 
                         vline_color = "grey1", vline_intercept = allresults_f$estimate[1])+ 
  geom_hline(yintercept = c(1.5, 4.5, 10.5), linetype="dashed", color = "grey")+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(angle = 90, size = 14), 
        axis.text.y = element_text(size = 14)) +
  scale_x_continuous(limits = c(0, 3.5)) +  xlab("") + 
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  coord_flip()
plot(Hetero_graph_f)
dev.off()
rm(allresults, allresults_f, DF_callers, DF_Callers_H, DF_calls, DF_H, list_DF, regresults, regresults_f, depvar)
###################################################################################
#######  #Table 3:  Demand for Telemedicine and Spatial Mobility           ######
###################################################################################
####################################################################
#Step 1) Run Event study regressions for call results & store coefficients 
####################################################################
DF_working <- within(DF_working, weekxyear <- relevel(factor(weekxyear), ref = "5")) #Use week 5 as reference
df2 <- DF_working %>% #Filter for weeks without data
  filter(!Week %in% c(6,7,8,17))
for(i in 1:length(depvar2_log)){ #Loop regressions over dependent variables
  model <- paste("ES", i, sep="")
  m <- lm_robust(as.formula(paste(depvar2_log[i], "~ weekxyear + factor(Week) + weekday_llamada")), data = df2, se_type = "HC1")
  dt <- head(broom::tidy(m, conf.int = TRUE), 50) #Store coefficients
  dt$Number <- as.numeric(parse_number(dt$term)) #Change term to number
  dt$term <- dt$Number # Keep only weekxyear 1:53 (excluding weeks 6,7,8,17)
  dt <- dt %>% 
    filter(term != 0)
  assign(model, dt)} #Assign names to the dataframes resulting from loop 
list_DF <- list(ES1, ES2, ES3, ES4) #Make the 4 results data frame a list 
list_DF <- lapply(list_DF, function(x)  #Change term number week 
  x <- x %>% 
    rename(Week = Number))
ES1 <- list_DF[[1]] #Event study for Resolutions
ES2 <- list_DF[[2]] #Event study for Prescriptions
ES3 <- list_DF[[3]] #Event study for Follow-Up
ES4 <- list_DF[[4]] #Event study for Referral
list_DF <- list(tbl, zbl) #Make list with even study main results
####################################################################
#Step2) Merge mobility data 
####################################################################
list_DF <- lapply(list_DF, function(x) #Merge to mobility data 
  x <- merge(Mobility, x, by= "Week"))
list_DF <- lapply(list_DF, function(x) 
  x <- x[!duplicated(x$Week), ]) #Keep one observation per week 
list_DF_post <- lapply(list_DF, function(x) #Generate two more data sets containing only Post period
  x <- x %>% 
    mutate(Post = ifelse(Week >=11,1,0)) %>% 
    filter(Post == 1))
#^^ Mobility data is shown daily, but average_mob is defined as the weekly simple average of all 3 mobility indicators. 
#Step3) Run regressions & output results
mob1 <- lm(estimate ~ average_mob, data = list_DF[[1]]) #Regression on All data for calls 
mob2 <- lm(estimate ~ average_mob, data = list_DF_post[[1]]) #Regression on Post data for calls 
mobz1 <- lm(estimate ~ average_mob, data = list_DF[[2]]) #Regression on All data for callers 
mobz2 <- lm(estimate ~ average_mob, data = list_DF_post[[2]]) #Regression on Post data for callers 
models <- c("mob1", "mobz1", "mob2" ,"mobz2")  #Define models to print 
for(i in 1:length(models)){ #Calculate robust standard errors for models 
  name <- paste("robust.se", i,sep = "")
  cov <- vcovHC(get(models[i]), type = "HC")
  robust.se <- sqrt(diag(cov))
  assign(name, robust.se )}
stargazer(mob1, mobz1, mob2, mobz2, #Output
          se = list(robust.se1, robust.se2,robust.se3,robust.se4),
          column.labels = c("beta{Calls}", "beta{Callers}", "beta{Calls}", "beta{Callers}"), 
          type= "latex", dep.var.labels.include = FALSE, model.names = TRUE, model.numbers =FALSE, float = FALSE,
          keep = c("average_mob"), dep.var.caption  = "", style = "jpam", omit.stat = c("f", "rsq", "ser"),
          out = "Tab3_Mobility.tex")

###################################################################################
#######  #Figure Appendix A1.          ######
###################################################################################
####################################################################
#Step1) Partition the graph so it shows discontinuities
####################################################################
DF_working <- DF_working %>% # For Calls
  mutate(pre_5 = ifelse(fecha_llamada < as.Date("2019-01-31"), calls, NA),
         post_8 = ifelse(fecha_llamada > as.Date("2019-03-10") & fecha_llamada <= as.Date("2019-12-31"), calls,NA),
         pre_17 = ifelse(fecha_llamada >= as.Date("2020-01-01") & fecha_llamada < as.Date("2020-04-17"),calls, NA), 
         post_17 = ifelse(fecha_llamada >= as.Date("2020-05-01"), calls, NA))
#^^ This is done to avoid connecting missing data with the time series line
####################################################################
#Step2) Graph
####################################################################
#-----For Calls-------
pdf("AppFig1_Calls.pdf.pdf")
calls <- DF_working %>%
  rename(Year = año_llamada) %>% 
  ggplot(aes(x= fecha_llamada)) + 
  geom_line(aes(y = log(pre_5), col = "grey50")) + 
  geom_line(aes(y = log(post_8), col = "grey50")) + 
  geom_line(aes(y = log(pre_17), col = "skyblue")) + 
  geom_line(aes(y = log(post_17), col = "skyblue")) + 
  scale_colour_manual(values = c("grey50", "skyblue")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) +
  geom_vline(xintercept = 11, linetype="dotted", color = "red", size=0.75) + 
  #scale_y_continuous(limits = c(0, 2400)) +
  scale_x_date(expand = c(0, 0), date_breaks =  "1 month",date_labels = "%b-%Y") +
  xlab("") +
  ylab("log(Daily Number of Calls)") +
  theme(legend.position="none") +
  theme(axis.text.x = element_text(angle = 90))
plot(calls)
dev.off()
#-----For Callers-------
DF_working <- DF_working %>% 
  mutate(pre_5 = ifelse(fecha_llamada < as.Date("2019-01-31"), callers, NA),
         post_8 = ifelse(fecha_llamada > as.Date("2019-03-10") & fecha_llamada <= as.Date("2019-12-31"), callers,NA),
         pre_17 = ifelse(fecha_llamada >= as.Date("2020-01-01") & fecha_llamada < as.Date("2020-04-17"),callers, NA), 
         post_17 = ifelse(fecha_llamada >= as.Date("2020-05-01"), callers, NA))
pdf("AppFig1_Callers.pdf") 
callers <- DF_working %>%
  rename(Year = año_llamada) %>% 
  ggplot(aes(x= fecha_llamada)) + 
  geom_line(aes(y = log(pre_5), col = "grey50")) + 
  geom_line(aes(y = log(post_8), col = "grey50")) + 
  geom_line(aes(y = log(pre_17), col = "skyblue")) + 
  geom_line(aes(y = log(post_17), col = "skyblue")) + 
  scale_colour_manual(values = c("grey50", "skyblue")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black")) +
  #scale_y_continuous(limits = c(0, 1200)) +
  scale_x_date(expand = c(0, 0), date_breaks =  "1 month",date_labels = "%b-%Y") +
  xlab("") +
  ylab("log(Daily number of Callers)") +
  theme(legend.position="none") +
  theme(axis.text.x = element_text(angle = 90))
plot(callers)
dev.off()
#############################################################################################
#######  #Figure Appendix A.3:  Treatment Effects:  Event-study Analysis Client Fixed Effects 
###################################################################################
####################################################################
#Step 1) Trim the database to the 7 biggest clients 
####################################################################
id_clients_90 <- c("1", "4", "28", "51","3", "20","49") #Define the 7 biggest clients 
DF_clients <- DF %>% #Filter dataframe to only those clients
  filter(ID_CLIENT %in% id_clients_90)
##^^ A previous analysis showed that these 7 clients represent 90\% of the total number 
##of observations in our data frame. The total number of unique clients is 94 
DF_clients <- DF_clients[order(DF_clients$fecha_llamada),]
DF_callers_clients <- DF_clients[!duplicated(DF_clients$ID_PATIENT), ]
####################################################################
#Step 2) Generate outcome variables 
####################################################################
#-----For Calls-------
DF_clients <- DF_clients %>%
  group_by(ID_CLIENT, fecha_llamada) %>% 
  mutate(count = n()) %>%  #Count calls per day and ID_CLIENT
  ungroup()
DF_clients <- DF_clients %>% #Keep only regression variables
  select(count, fecha_llamada, ID_CLIENT, año_llamada, Week, weekday_llamada, weekday_llamada) 
DF_clients <- unique(DF_clients) #Keep one observation per day per client 
DF_clients <- tidyr::spread(DF_clients, ID_CLIENT, count) 
DF_clients <- na.zero(DF_clients) #Change NA to 0 (same reasoning as above)
DF_clients <- tidyr::gather(DF_clients, ID_CLIENT, calls, `1`:`51`, 
                            factor_key = TRUE) #Generate one column for client containing count per client
DF_clients <- DF_clients %>%  #Regression varaibles 
  mutate(Post = ifelse(Week >=11,1,0), #Define treatment period
         Treat_year = ifelse(año_llamada>2019,1,0), #Define treatment year
         PostxYear2020 = Post*Treat_year) #Interaction term 
DF_clients <- DF_clients %>% #For Event Study create a dummy for week_txyear2020 
  mutate(WeekxYear = Week* Treat_year, #Interaction term 
         weekxyear = as.character(WeekxYear))
DF_clients <- within(DF_clients, weekxyear <- relevel(factor(weekxyear), ref = "5")) #Use week 5 as reference week
#-----For Callers-------
DF_callers_clients <- DF_callers_clients %>%
  group_by(ID_CLIENT, fecha_llamada) %>% #Count first time callers per day and ID_CLIENT
  mutate(count = n()) %>% 
  ungroup() 
DF_callers_clients <- DF_callers_clients %>% 
  select(count, fecha_llamada, ID_CLIENT, año_llamada, Week, weekday_llamada, weekday_llamada) 
DF_callers_clients <- unique(DF_callers_clients) 
DF_callers_clients <- tidyr::spread(DF_callers_clients, ID_CLIENT, count) 
DF_callers_clients <- na.zero(DF_callers_clients)
DF_callers_clients <- tidyr::gather(DF_callers_clients, ID_CLIENT, calls, `1`:`51`, 
                            factor_key = TRUE) 
DF_callers_clients <- DF_callers_clients %>%  
  mutate(Post = ifelse(Week >=11,1,0),
         Treat_year = ifelse(año_llamada>2019,1,0),
         PostxYear2020 = Post*Treat_year)
DF_callers_clients <- DF_callers_clients %>% 
  mutate(WeekxYear = Week* Treat_year,
         weekxyear = as.character(WeekxYear))
DF_callers_clients <- within(DF_callers_clients, weekxyear <- relevel(factor(weekxyear), ref = "5"))
####################################################################
#Step 3) Run regressions and store coefficients
####################################################################
df_2 <- DF_clients %>%  #Filter for weeks we don't have data 
  filter(!Week %in% c(6,7,8,17))
df_2_callers <- DF_callers_clients %>% 
  filter(!Week %in% c(6,7,8,17))
#-----For Calls-------
Call_ES_client<- lm_robust(log(calls+1) ~ weekxyear + factor(Week) + weekday_llamada + factor(ID_CLIENT), data = df_2, se_type = "HC1") #ES
tblc <- head(broom::tidy(Call_ES_client, conf.int = TRUE), 50) #Saving coefficients in a dataframe
tblc$Number <- as.numeric(parse_number(tblc$term)) #Changing terms to merge with mob data 
tblc$term <- tblc$Number
tblc <- tblc %>% #Keep only coeffs for weekxyear 1:53 (excluding weeks 6,7,8,17)
  filter(term != 0)
tblc <- tblc %>% #Merge with mobility data for graphs
  rename(Week = Number)
merge_tc_week <- merge(Mob, tblc, by= "Week", all = TRUE) #Merge to mob data 
#-----For Callers-------
Caller_ES_client<- lm_robust(log(calls+1) ~ weekxyear + factor(Week) + weekday_llamada + factor(ID_CLIENT), data = df_2_callers, se_type = "HC1")
zblc <- head(broom::tidy(Caller_ES_client, conf.int = TRUE), 50)
zblc$Number <- as.numeric(parse_number(zblc$term))
zblc$term <- zblc$Number
zblc <- zblc %>% 
  filter(term != 0)
zblc <- zblc %>% #Merge with mobility data for graphs
  rename(Week = Number)
merge_zc_week <- merge(Mob, zblc, by= "Week", all = TRUE)
####################################################################
#Step 4) Plot coefficients
####################################################################
coeff <-25 #Define relation ratio for mobility data and event study coefficients (for graphing purposes)
pdf("AppFig2_calls.pdf") #Output
ggplot(merge_tc_week, aes(x= Week, y = estimate)) +
  scale_y_continuous(name= "Point Estimate Values",
                     sec.axis = sec_axis( trans=~.*25, name="Trends in Mobility")) +
  theme_ipsum()

testimate <- ggplot(merge_tc_week, aes(x=Week, y =estimate)) +
  geom_point()+
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high), color = estimate_col) +
  geom_line(aes(y=average_mob/coeff), color = average_col)+
  scale_y_continuous(
    name="Point Estimate for log(Calls)",
    sec.axis = sec_axis(~.*coeff,name = "Mobility Indicator", labels = function(x) paste0(x, "%"))) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_x_continuous(expand = c(0.01,0),breaks =breaks) +
  geom_vline(xintercept=11, linetype="dashed", 
             color = "grey", size=0.5) +
  geom_hline(yintercept = 0, linetype = "dashed", 
             color = "red", size = 0.5) + 
  theme(axis.text.x = element_text(angle = 90))
plot(testimate)
dev.off()
pdf("AppFig2_callers.pdf") #Output
ggplot(merge_zc_week, aes(x= Week, y = estimate)) +
  scale_y_continuous(name= "Point Estimate Values",
                     sec.axis = sec_axis( trans=~.*25, name="Trends in Mobility")) +
  theme_ipsum()
zestimate <- ggplot(merge_zc_week, aes(x=Week, y =estimate)) +
  geom_point()+
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high), color = estimate_col) +
  geom_line(aes(y=average_mob/coeff), color = average_col)+
  scale_y_continuous(
    name="Point Estimate for log(First Time Callers)",
    sec.axis = sec_axis(~.*coeff,name = "Mobility Indicator", labels = function(x) paste0(x, "%"))) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_x_continuous(expand = c(0.01,0),breaks =breaks) +
  geom_vline(xintercept=11, linetype="dashed", 
             color = "grey", size=0.5) +
  geom_hline(yintercept = 0, linetype = "dashed", 
             color = "red", size = 0.5) + 
  theme(axis.text.x = element_text(angle = 90))
plot(zestimate)
dev.off()
####################################################################
#Figure Appendix A.4: In-person and Telemedicine Consultations 2019-2020 
#Data from a Large Health Insurance Company
####################################################################
####################################################################
#Step 1) Prepare data for graph 
####################################################################
provider_data <- provider_data %>% #Generate year, and month-day variable from date
  mutate(year = format(fecha_llamada, format ="%Y"),
         month_day = format(fecha_llamada, format = "%m-%d"))
##^^ The above process generates a "long" table with a two observations per month-day and a column indicating the year 
provider_data1 <- provider_data %>% #Data with month-day and in-person consultations
  select(-c("fecha_llamada", "tele"))
provider_data1<- tidyr::spread(provider_data1,year,in_person) #Generate two column (2019 and 2020) with count per day 
provider_data1 <- provider_data1 %>% #Change name of columns 
  rename(inperson_2019 = `2019`, inperson_2020 = `2020`)
provider_data2 <- provider_data %>% #Data with month-day and tele consultations
  select(-c("fecha_llamada", "in_person"))
provider_data2<- tidyr::spread(provider_data2,year,tele) #Generate two column (2019 and 2020) with count per day
provider_data2 <- provider_data2 %>%  #Change name of columns 
  rename(tele_2019 = `2019`, tele_2020 = `2020`)
provider_data <- merge(provider_data1, provider_data2, by = "month_day", all = TRUE) #merge in-person an tele data
rm(provider_data1,provider_data2) #Clean environment 
provider_data<- na.zero(provider_data) #NA's mean count = 0
provider_data <- provider_data %>%  #Since 2019 contains a 02-29 filter it
  filter(!month_day == "02-29") %>% 
  mutate(year= 2020, #Generate 2020 dates from mont_day
         fecha = paste0("2020-",month_day,sep=""),
         fecha = as.Date(fecha),
         week= lubridate::week(ymd(fecha)))
#^^ Since we will calculate the weekly percent change from 2019 to 2020, we use only 2020 to graph data
provider_data <- provider_data %>% #Count weekly number of consultations by year and in-person/tele
  group_by(week) %>% 
  mutate(weekly_ip19 = sum(inperson_2019, na.rm = TRUE),
         weekly_ip20 = sum(inperson_2020, na.rm = TRUE),
         weekly_tele19 = sum(tele_2019, na.rm = TRUE),
         weekly_tele20 = sum(tele_2020, na.rm = TRUE)) %>% 
  ungroup() %>% 
  mutate(change_ip = weekly_ip20/(weekly_ip19 + weekly_tele19), #Calculate ratio for consultations_2020/consultations_2019
         change_tele = weekly_tele20/(weekly_ip19 + weekly_tele19))
provider_data <- provider_data %>% #Keep one observation per week
  distinct(week, .keep_all = TRUE) 
####################################################################
#step 2) Calculate Week rolling average 
####################################################################
rolling_ip <-c() #Define empty vector to store observations 
for (i in 1:53){ #For every week: 
  ma <- provider_data %>% #Filter that week and the two conseq weeks
    filter(week >= i & week <= i+2) 
  mean_ip<- mean(ma$change_ip) #calculate average
  week <- i #Keep track of the week 
  MA <- data.frame(mean_ip, week)
  rolling_ip <- rbind(rolling_ip, MA) #store coefficient
}
rm(i,mean_ip,week,ma,MA) #clean environment 
provider_data <- merge(provider_data, rolling_ip, by = "week") #Merge to main data 
rolling_tele <-c()
for (i in 1:53){
  ma <- provider_data %>% 
    filter(week >= i & week <= i+2) 
  mean_tele<- mean(ma$change_tele)
  week <- i
  MA <- data.frame(mean_tele, week)
  rolling_tele <- rbind(rolling_tele, MA)
}
rm(i,mean_tele,week,ma,MA)
provider_data <- merge(provider_data, rolling_tele, by = "week") #Merge to main data 
####################################################################
#Step 3) Graph 
####################################################################
pdf("AppFig4_Provider.pdf.pdf") 
provider <- ggplot(provider_data, aes(x=fecha)) +
  geom_line(aes(y=mean_ip), col="dodgerblue4") +
  geom_line(aes(y=mean_tele * 10), col="#008B8B", linetype = "dashed") +
  scale_y_continuous(name="In-Person", sec.axis=sec_axis(~./10, 
                                                         name="Teleconsultations",labels = percent), labels = percent) +
  geom_hline(yintercept = 1, linetype = "dashed", col = "grey")+
  scale_x_date(breaks = "1 month", expand= c(0.01,0), date_labels = "%b-%Y") + 
  theme(axis.title.y.left=element_text(color="dodgerblue4"),
        axis.text.y.left=element_text(color="dodgerblue4"),
        axis.title.y.right=element_text(color="#008B8B"),
        axis.text.y.right=element_text(color="#008B8B")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("") 
plot(provider)
dev.off()
rm(rolling_ip,rolling_tele)
####################################################################
#Table Appendix A.1:  Test of Parallel Trends (AppTab1_paralleltest)
####################################################################
####################################################################
#Step1) Run regressions on Main results 
####################################################################
df_2 <- DF_working %>% #Filter for weeks we don't have data 
  filter(!Week %in% c(6,7,8,17))
Call_ES<- lm_robust(log(calls) ~ weekxyear + factor(Week) + weekday_llamada, data = df_2, se_type = "HC1") #regression for calls 
Caller_ES<- lm_robust(log(callers) ~ weekxyear + factor(Week) + weekday_llamada, data = df_2, se_type = "HC1") #regression for callers
####################################################################
#Step2) Run hypothesis test on coefficients and store results 
####################################################################
#-----For Calls-------
#Test on week1=week2=week3=week4=0, using F-test 
p1 <- linearHypothesis(Call_ES, c("weekxyear1=weekxyear2", "weekxyear1=weekxyear3",  "weekxyear1=weekxyear4", "weekxyear1=0"),singular.ok = TRUE,test = "F")
p1 <- p1$`Pr(>F)`[2] #Storing P-value  
#Test on Week9=Week10=0
p2<- linearHypothesis(Call_ES, c("weekxyear9=weekxyear10", "weekxyear10=0"), singular.ok = TRUE, test = "F")
p2 <- p2$`Pr(>F)`[2] #Storing P-value 
#-----For Callers-------
p3<- linearHypothesis(Caller_ES, c("weekxyear1=weekxyear2", "weekxyear1=weekxyear3", "weekxyear1=weekxyear4", "weekxyear1=0"), singular.ok = TRUE,test = "F")
p3 <- p3$`Pr(>F)`[2]
p4 <- linearHypothesis(Caller_ES, c("weekxyear9=weekxyear10","weekxyear10=0" ), singular.ok = TRUE, test = "F")
p4 <- p4$`Pr(>F)`[2]
####################################################################
#Step1) Run regressions on call resolution 
####################################################################
for(i in 1:length(depvar2_log)){
  model <- paste("reg", i, sep="")
  m <- lm_robust(as.formula(paste(depvar2_log[i], "~ weekxyear + factor(Week) + weekday_llamada")), data = df_2, se_type = "HC1")
  assign(model, m)}
####################################################################
#Step2) Run hypothesis test on coefficients and store results 
####################################################################
#-----For Resolved-------
p5 <- linearHypothesis(reg1, c("weekxyear1=weekxyear2", "weekxyear1=weekxyear3", "weekxyear1=weekxyear4", "weekxyear1=0"), singular.ok = TRUE, test ="F")
p5 <- p5$`Pr(>F)`[2]
p6 <- linearHypothesis(reg1, c("weekxyear9=weekxyear10","weekxyear10=0" ), singular.ok = TRUE, test = "F")
p6 <- p6$`Pr(>F)`[2]
#-----For Prescription-------
p7 <- linearHypothesis(reg2, c("weekxyear1=weekxyear2", "weekxyear1=weekxyear3", "weekxyear1=weekxyear4", "weekxyear1=0"), singular.ok = TRUE,test = "F")
p7 <- p7$`Pr(>F)`[2]
p8 <-linearHypothesis(reg2, c("weekxyear9=weekxyear10","weekxyear10=0" ), singular.ok = TRUE, test = "F")
p8 <- p8$`Pr(>F)`[2]
#-----For Follow-Up-------
p9 <- linearHypothesis(reg3, c("weekxyear1=weekxyear2", "weekxyear1=weekxyear3", "weekxyear1=weekxyear4", "weekxyear1=0"),singular.ok = TRUE,test = "F")
p9 <- p9$`Pr(>F)`[2]
p10 <- linearHypothesis(reg3, c("weekxyear9=weekxyear10","weekxyear10=0" ), singular.ok = TRUE, test = "F")
p10 <- p10$`Pr(>F)`[2]
#-----For Referral ------
p11 <- linearHypothesis(reg4, c("weekxyear1=weekxyear2", "weekxyear1=weekxyear3", "weekxyear1=weekxyear4", "weekxyear1=0"), singular.ok = TRUE,test = "F")
p11 <- p11$`Pr(>F)`[2]
p12 <- linearHypothesis(reg4, c("weekxyear9=weekxyear10","weekxyear10=0" ), singular.ok = TRUE, test = "F")
p12 <- p12$`Pr(>F)`[2]

`week1:week4`<- c(round(p1,5) , round(p3,4),round(p5,4),round(p7,4),round(p9,4),round(p11,4)) #Storing results in table 
`week9:week10`<- c(round(p2,4), round(p4,4),round(p6,4),round(p8,4),round(p10,4),round(p12,4))
hypo <- as.data.frame(rbind(`week1:week4`, `week9:week10`))
hypo <- hypo %>% #Labeling the colums of the table
  rename(Calls = V1, Callers = V2, Resolved = V3, Prescription = V4, `Follow-up` = V5, Referral = V6)
hypo <- setDT(hypo, keep.rownames = TRUE)[]
print(xtable(format(hypo,digits =2)),type="latex", floating = FALSE, include.rownames = FALSE,file="AppTab1_paralleltest.tex") #Output
####################################################################
#Table Appendix A.2:  Demand for Telemedicine and Spatial Mobility:  
#Other Outcomes
####################################################################
####################################################################
#Step2) Merge mobility data 
####################################################################
list_DF <- list(ES1, ES2, ES3, ES4) #Generate a list with the call outcome event study df
list_DF <- lapply(list_DF, function(x)  #Merge with mobility data 
  x <- merge(Mobility, x, by= "Week"))
list_DF <- lapply(list_DF, function(x) #Generate Post variable 
  x <- x %>% 
    mutate(Post = ifelse(Week >= 11, 1,0)))
list_DF <- lapply(list_DF, function(x) #Keep one observation per week 
  x <- x[!duplicated(x$Week), ])
list_DF_post <- lapply(list_DF, function(x) #Generate df with Post data only 
  x <- x %>% 
    filter(Post == 1))
####################################################################
#Step3) Run regressions & output results
####################################################################
mobr1 <- lm(estimate ~ average_mob, data = list_DF[[1]]) #For Resolved All Data 
mobr2 <- lm(estimate ~ average_mob, data = list_DF_post[[1]]) #For Resolved Post Data 
mobp1 <- lm(estimate ~ average_mob, data = list_DF[[2]]) #For Prescription All Data 
mobp2 <- lm(estimate ~ average_mob, data = list_DF_post[[2]]) #For Prescription Post Data 
mobf1 <- lm(estimate ~ average_mob, data = list_DF[[3]]) #For Follow-up All Data 
mobf2 <- lm(estimate ~ average_mob, data = list_DF_post[[3]])#For Follow-up Post Data 
mobref1 <- lm(estimate ~ average_mob, data = list_DF[[4]])#For Referral All Data 
mobref2 <- lm(estimate ~ average_mob, data = list_DF_post[[4]])#For Referral Post Data 
models <- c("mobr1", "mobr2", "mobp1" ,"mobp2", "mobf1", "mobf2","mobref1", "mobref2")  #Define models to print
star.out.1 <- stargazer(mobr1, mobp1, mobf1, mobref1, keep.stat = "n", keep = "average_mob", float = FALSE, header = FALSE,
                        column.labels = c("$beta_{Resolved}$", "$beta_{Prescription}$", "$beta_{Follow-up}$", "$beta_{Referral}$"),
                        dep.var.labels.include = FALSE)
star.out.2 <- stargazer(mobr2, mobp2, mobf2, mobref2, keep.stat = "n", keep = "average_mob", float = FALSE, header = FALSE,
                        column.labels = c("$beta_{Resolved}$", "$beta_{Prescription}$", "$beta_{Follow-up}$", "$beta_{Referral}$"),
                        dep.var.labels.include = FALSE)

##stargazer panel -- same summary statistics across panels.
star.panel.out <- starpolishr::star_panel(star.out.1, star.out.2, panel.names = c("All 2020", "2020 Post Week 11"))
write(star.panel.out,file="AppTab2_mobility.tex") #Output
####################################################################
#Table Appendix A.3:  Health Insurance Provider’s Characteristics
####################################################################
####################################################################
#Step1) Generate variables by category 49 and rest of the sample
###################################################################
DF <- DF %>% #Generate variable to indicate what calls were received for client 49 
  mutate(Provider=ifelse(ID_CLIENT == "49",1,0))
DF_provider <- DF %>% #Generate descriptive stats by client49 and rest of the sample 
  group_by(Provider) %>% 
  mutate(resolved = ifelse(status == "Resolved",1,0),
         prescription = ifelse(RECETA == "Si",1,0), 
         follow_up = ifelse(status == "Follow-up",1,0),
         referral = ifelse(status == "Referral",1,0),
         general_medicine = ifelse(Specialty == "General Medicine",1,0),
         ob_gyn = ifelse(Specialty == "Ob/Gyn", 1,0),
         pediatrics = ifelse(Specialty == "Pediatrics",1,0),
         male = ifelse(GENERO == "M",1,0))
tab_provider <- DF_provider %>% #Calculate averages 
  summarise(resolved = mean(resolved), prescription= mean(prescription), follow_up =mean(follow_up), referral = mean(referral),
            general_medicine = mean(general_medicine, na.rm = TRUE), ob_gyn= mean(ob_gyn, na.rm = TRUE), pediatrics = mean(pediatrics, na.rm = TRUE), 
            EDAD = mean(EDAD),male = mean(male), pre_existing = mean(pre_existing, na.rm = TRUE))
tab_provider <- as.data.frame(t(tab_provider)) #Save resulsts 
tab_provider <- setDT(tab_provider, keep.rownames = TRUE)[] #Format labels 
tab_provider <- tab_provider %>% 
  rename(depvar = rn) 
depvar <- c("resolved","prescription","follow_up","referral", "general_medicine","ob_gyn","pediatrics","EDAD","male","pre_existing") #Define dep variables
####################################################################
#Step2) #Apply regressions to the dependent variable and save results in a df 
###################################################################
regresults_pr <- lapply(depvar, function(dv) { #For all variables in depvar 
  pr <- lm_robust(get(dv) ~ Provider, data = DF_provider) #Regression over provider category
  Coeff_new <- head(broom::tidy(pr, conf.int = TRUE), 5) #Save coefficintes 
  Coeff_new <- Coeff_new %>%  #Keep only coefficient for Provider term 
    filter(term == "Provider")
})
#Return list of coefficients
allresults_pr <- data.frame(depvar = depvar, #Compile results into a single table 
                            do.call(rbind, regresults_pr))
tab_provider <- merge(tab_provider, allresults_pr, by= "depvar", all.x = TRUE) #Merge names with depvar 
tab_provider <- tab_provider %>%  #Keep only var name, mean for provider, mean for rest and Provider coeff
  dplyr::select(depvar, V1, V2, estimate, p.value)
tab_provider <- tab_provider %>% #Label columns 
  rename(Variable = depvar, 
         Sample = V1, 
         `Health Insurance Company` = V2,
         Difference = estimate)
tab_provider <- tab_provider[-2,] 
tab_provider <- tab_provider %>% #Label variables 
  mutate(Variable = ifelse(Variable == "EDAD", "Age", 
                           ifelse(Variable == "follow_up", "Follow-up", 
                                  ifelse(Variable == "general_medicine", "General Medicine", 
                                         ifelse(Variable == "male", "Male", 
                                                ifelse(Variable == "ob_gyn", "Ob/Gyn", 
                                                       ifelse(Variable == "pediatrics", "Pediatrics", 
                                                              ifelse(Variable == "pre_existing", "Previously Diagnosed", 
                                                                     ifelse(Variable == "prescription", "Prescription",
                                                                            ifelse(Variable == "resolved", "Resolved", 
                                                                                   ifelse(Variable == "referral", "Referral",NA)))))))))))
tab_provider <- tab_provider %>% #Group variables and order them according to categories
  mutate(Variable = factor(Variable, levels = c("Resolved", "Prescription", "Follow-up", "Referral", "General Medicine", "Ob/Gyn", 
                                                "Pediatrics", "Age", "Male", "Previously Diagnosed")))
tab_provider <- tab_provider[order(Variable),]
tab_Health <- xtable(tab_provider, caption = NULL, label = "Descriptive Statistics by Health Insurace Company", digits = 2) 
tab_Health$Sample <- percent(tab_Health$Sample) #Format averages as percentages for column 1 
tab_Health$`Health Insurance Company` <- percent(tab_Health$`Health Insurance Company`) #Format averages as percentages for column 2
tab_Health$Difference <- percent(tab_Health$Difference) #Format averages as percentages for column 3
tab_Health$`p.value` <- format(round(tab_Health$`p.value`,4),scientific=F) #Format P-value
#Since age is not a percentage format Age (row 8) separately
tab_Health$Sample[8] <- as.numeric(gsub("([0-9]+).*$", "\\1", tab_Health$Sample[8]))/100
tab_Health$`Health Insurance Company`[8] <- as.numeric(gsub("([0-9]+).*$", "\\1", tab_Health$`Health Insurance Company`[8]))/100
tab_Health$Difference[8] <- as.numeric(gsub("([0-9]+).*$", "\\1", tab_Health$Difference[8]))/100
print(xtable(format(tab_Health,digits = 3),type="latex", floating = FALSE, include.rownames = FALSE), file="AppTab3_descriptives.tex") #Output
rm(DF_provider, tab_Health, tab_provider)





